Create figures for the clinical paper

All data is coming from tables for the paper

# labels for time points
timeLab  = c("Baseline", "Post Run-in", "Week 8")

# set colors
recistCol = c("Non-responder" = "red","Responder" = "blue")
cbrCol = c("CR,PR,SD" = "blue","PD" = "red")
subtypeCol = c("HR+" = "red", "TNBC" = "blue")
timeCol = setNames(c("black", 'grey45', 'grey75'), timeLab)

# set root folder
rootDir = "./"

# Load data patient data
dta_pts = read.csv(paste0(rootDir,"input_tables/table_24patient_info.csv"), row.names = 1)
dta_pts$ID = rownames(dta_pts)

# remove patients with no response
dta_pts = dta_pts %>% filter(Best_Resp != "Unknown")
dim(dta_pts)
## [1] 20 22
table(dta_pts$RECIST)
## 
## Non-responder     Responder 
##            15             5
# load cluster proportions
dat = read.csv(paste0(rootDir,"input_tables/table_per_sample_values.csv"))
props = grep("proportion", colnames(dat), value = T)

#===========================
# add patient info 
dat = cbind(dat,  dta_pts[dat$patientID,])

Cluster proportions

Table with p-values

Pairwise Wilcoxon test was used for each time point comparing responders vs non-responders. And paired Wilcoxon test was used for baseline vs post run-in and post run-in vs week 8. We took the differences between all time points and then compare those differences between responders vs non-responders to see if there is significant difference in changes after treatment

prop_res = c()
for (k in props)
{
  prop_res = rbind(prop_res, c(cluster = gsub("_proportion","",k), runWilcox(k, dat, dta_pts)))
}

knitr::kable(prop_res, "simple",digits = 3)
cluster BvsT1 T1vsT2 BvsT2 B_RvsNR T1_RvsNR T2_RvsNR d_T1_B_RvsNR d_T2_B_RvsNR d_T2_T1_RvsNR mean_d_T1_B_R mean_d_T1_B_NR mean_d_T2_B_R mean_d_T2_B_NR mean_d_T2_T1_R mean_d_T2_T1_NR
B 0.611 0.193 0.02 0.037 0.328 0.171 0.879 0.114 0.476 -1.096 -0.481 -7.104 -2.177 -6.21 -1.915
Baso 0.001 1 0.432 0.792 1 1 0.574 0.61 0.61 -0.416 -0.204 -0.324 -0.07 0.115 0.092
CD4_T 0.818 0.432 0.77 0.234 0.064 0.61 0.506 0.257 0.114 -4.535 -0.108 5.56 -4.778 11.409 -1.602
CD8_T 0.378 0.232 0.275 0.721 1 0.914 0.799 0.476 0.914 1.289 1.996 2.246 1.511 2.785 1.425
cDC1 0.185 0.275 0.275 0.598 0.752 0.476 0.205 0.762 0.352 0.058 -0.455 0.056 -0.002 -0.029 0.243
cDC2 0.19 0.846 0.037 0.234 0.646 1 0.574 0.257 0.762 -0.461 -0.101 -0.802 -0.386 -0.283 0.007
DNT 0.587 0.906 0.636 0.562 0.959 0.61 1 0.914 0.476 -0.026 -0.128 -1.618 -0.143 -1.584 -0.243
DPT 0.818 0.16 0.77 0.383 0.574 0.476 0.959 0.476 0.352 0.024 0.068 0.313 0.62 0.26 0.456
GMDSC 0.698 0.695 0.695 0.37 0.646 0.61 1 0.61 0.61 -0.003 0.035 0 -0.11 0.005 -0.146
MMDSC_I 0.404 0.232 0.625 0.959 0.879 0.762 0.721 0.476 0.61 0.126 1.346 0.319 -0.27 0.139 -0.941
MMDSC_II 0.487 0.922 0.695 0.958 0.833 0.61 1 0.914 0.914 0.045 0.047 -0.09 0.065 -0.178 -0.018
MMDSC_III 0.159 0.432 1 0.721 0.879 0.762 0.799 0.914 0.914 0.501 0.04 0.124 0.294 -0.112 -0.192
Mono_I 1 0.922 0.557 0.646 0.879 0.61 0.328 0.476 0.257 1.554 -1.996 1.086 -1.226 -1.009 0.789
Mono_II 0.842 0.906 0.064 0.833 1 0.748 0.833 0.914 0.914 0.051 0.099 0.143 0.367 -0.084 0.455
Mono_III 0.459 0.193 1 0.234 0.646 0.762 0.574 0.61 1 0.401 7.676 -3.497 1.325 -6.164 -3.465
Neu 0.017 0.169 1 0.195 0.799 0.914 0.027 0.067 0.61 0.047 -3.725 3.14 -1.235 3.081 -0.1
NK_like 0.517 0.322 0.105 0.104 0.027 0.476 0.721 0.352 0.476 3.156 -3.65 1.064 6.383 -2.293 5.625
pDC 0.051 0.625 0.232 0.562 0.328 0.171 0.442 0.61 0.476 -0.723 -0.522 -0.595 -0.521 0.223 -0.771
UA 0.89 0.232 0.275 0.154 0.328 0.762 0.959 0.171 0.038 0.006 0.064 -0.024 0.354 -0.069 0.3
write.csv(prop_res,file = "table_cytof_wilcox_props.csv", row.names = F)

Marker expression by clusters

#==========================
# file with marker expression
cytof_dat = read.csv(paste0(rootDir,"input_tables/table_cytof_marker_expression.csv"))


markers = colnames(cytof_dat)[4:ncol(cytof_dat)]
#===========================
# add patient info 
cytof_dat = cbind(cytof_dat,  dta_pts[cytof_dat$patientID,])
cat("The number of patients with CYTOF data:", length(intersect(cytof_dat$patientID, dta_pts$ID)))
## The number of patients with CYTOF data: 17
# print the number of pairs between time points
mat = printPairs(cytof_dat)
## The number of paired samples between Baseline and Post Run-in : 17 
## The number of paired samples between Baseline and Week 8 : 10 
## The number of paired samples between Week 8 and Post Run-in : 10
# take only baseline and time 1 values
df_BvT1 <- cytof_dat %>% filter(timePoint %in% c('Baseline','Post Run-in'))

# take only time 1 and time 2 values
df_T1vT2 <- cytof_dat %>% filter(timePoint %in% c('Week 8','Post Run-in'))

Table with p-values

Pairwise Wilcoxon test was used for each time point comparing responders vs non-responders. And paired Wilcoxon test was used for baseline vs post run-in and post run-in vs week 8. We took the differences between all time points and then compare those differences between responders vs non-responders to see if there is significant difference in changes after treatment

# Calculate p-values from Wilcoxon test: paired for time points and not paired for response

res = c()
# split by clusters
cytof_dat_byClust = split(cytof_dat, f = factor(cytof_dat$cluster))

for(j in names(cytof_dat_byClust))
{
  #
  d = cytof_dat_byClust[[j]]
  for (k in markers)
  {
    res = rbind(res, c(marker = k, cluster = j, runWilcox(k, d, dta_pts)))
  }
}

res = res[order(res[,'marker']),]

datatable(res)
write.csv(res,file = "table_cytof_wilcox_all_markers.csv", row.names = F)

Boxplot (Responders and Non-responders)

# make boxplots for markers/clusters that have p-value < 0.05 in d_T1_B_RvsNR
sets = data.frame(res)
sets = sets %>% filter(d_T1_B_RvsNR < 0.05 & cluster%in% c('B','CD4_T','CD8_T','DNT','DPT','NK_like',"cDC1"))

for (i in 1:nrow(sets))
{
  k = sets[i,'cluster']
  j = sets[i,'marker']
d = cytof_dat %>% filter(cluster == k)
  df = cbind(protein = d[,j], 
             d[,c("patientID", "timePoint", "RECIST","Subtype") ] )

    p = ggpaired(df , x="timePoint",y="protein", id = "patientID", 
                 line.size = 0.4, title = paste0(j,". ", k), 
                 line.color = "RECIST",palette =  recistCol, xlab="Time") +
      facet_wrap("RECIST") +
      scale_x_discrete(labels=timeLab)
    
    print(p)
}

# additionally, plot CD103 in cDC1 cluster

  k = "cDC1"
  j = "CD103"
d = cytof_dat %>% filter(cluster == k)
d = CreateAllFacet(d,"RECIST")
  df = cbind(protein = d[,j], 
             d[,c("patientID", "timePoint", "RECIST","Subtype", "facet") ] )

    p = ggpaired(df , x="timePoint",y="protein", id = "patientID", 
                 line.size = 0.4, title = paste0(j,". ", k), 
                 line.color = "RECIST",palette =  recistCol, xlab="Time") +
      facet_wrap("facet") +
      scale_x_discrete(labels=timeLab)
    
    print(p)

Boxplot (Baseline vs Post Run-in)

#ggpaired(df_BvT1 %>% filter(cluster %in% c("GMDSC")),
for (i in markers)
{
  p = ggpaired(df_BvT1, x="timePoint",y=i, id = "patientID", line.color = "RECIST", line.size = 1,
   palette =  recistCol, title = paste0(i,". Baseline vs Post Run-in"), xlab="Time", ylab="MMI") + 
    scale_x_discrete(labels=timeLab)+ 
    facet_wrap("cluster", scales = 'free') +
    theme(axis.text.x=element_text(angle = 45, hjust = 1))

  print(p)
}

Boxplot (Post Run-in vs Week 8)

for (i in markers)
{
  p = ggpaired(df_T1vT2, x="timePoint",y=i, id = "patientID", line.color = "RECIST", line.size = 1,
   palette =  recistCol, title = paste0(i,". Post Run-in vs Week 8"), xlab="Time", ylab="MMI") +
#    scale_x_discrete(labels=timeLab)+
    facet_wrap("cluster", scales = 'free') +
    theme(axis.text.x=element_text(angle = 45, hjust = 1))

  print(p)
}

sessionInfo()
## R version 4.3.1 (2023-06-16 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] Matrix_1.6-3      scales_1.2.1      gridExtra_2.3     DT_0.30          
## [5] data.table_1.14.8 ggrepel_0.9.4     ggpubr_0.6.0      ggplot2_3.4.4    
## [9] dplyr_1.1.2      
## 
## loaded via a namespace (and not attached):
##  [1] sass_0.4.7        utf8_1.2.3        generics_0.1.3    tidyr_1.3.0      
##  [5] rstatix_0.7.2     lattice_0.21-8    digest_0.6.33     magrittr_2.0.3   
##  [9] evaluate_0.23     grid_4.3.1        fastmap_1.1.1     jsonlite_1.8.7   
## [13] backports_1.4.1   purrr_1.0.2       fansi_1.0.4       crosstalk_1.2.0  
## [17] jquerylib_0.1.4   abind_1.4-5       cli_3.6.1         rlang_1.1.1      
## [21] ellipsis_0.3.2    munsell_0.5.0     withr_2.5.2       cachem_1.0.8     
## [25] yaml_2.3.7        tools_4.3.1       ggsignif_0.6.4    colorspace_2.1-0 
## [29] broom_1.0.5       vctrs_0.6.3       R6_2.5.1          lifecycle_1.0.4  
## [33] car_3.1-2         htmlwidgets_1.6.2 pkgconfig_2.0.3   pillar_1.9.0     
## [37] bslib_0.5.1       gtable_0.3.4      glue_1.6.2        Rcpp_1.0.11      
## [41] highr_0.10        xfun_0.39         tibble_3.2.1      tidyselect_1.2.0 
## [45] rstudioapi_0.15.0 knitr_1.45        farver_2.1.1      htmltools_0.5.5  
## [49] labeling_0.4.3    rmarkdown_2.25    carData_3.0-5     compiler_4.3.1